######################
#  Replication code  #
#  for "Conflict on  #
#  the urban fringe" #
#(Gizelis, Pickering #
#     and Urdal)     #
######################

# set working directory and load data

	setwd("your_path_here")

# load main data grid

	grid <- read.csv("grid_v_023.txt", header=T, sep=";")

# load just the urban cells

	urban4 <- read.table("urban4_v_023_12months.csv", sep=",",header=T)
	
# load stargazer for presentation of regression tables

	library(stargazer)

######################
#      Table 1       #
######################

# All types

	# Cell count (all years)
	
	r1a <- length(grid$gridYearId)
	
	# Cell years experiencing unrest
	
	r1b <- length(grid$gridYearId[grid$scad1.6 > 0])
	
	# Percentage of cell years experiencing unrest
	
	r1c <- round(length(grid$gridYearId[grid$scad1.6 > 0]) / length(grid$gridYearId) * 100, digits=1)
	
	# Total unrest events in cells
	
	r1d <- sum(grid$scad1.6)
	
	# Percentage of all unrest events
	
	r1e <- round(sum(grid$scad1.6[grid$scad1.6 > 0]) / sum(grid$scad1.6) * 100, digits=1)
	
# Core urban

	# Cell count (all years)
	
	r2a <- length(grid$gridYearId[grid$urban1 == 1])
	
	# Cell years experiencing unrest
	
	r2b <- length(grid$gridYearId[grid$scad1.6 > 0 & grid$urban1 == 1])
	
	# Percentage of cell years experiencing unrest
	
	r2c <- round(length(grid$gridYearId[grid$scad1.6 > 0 & grid$urban1 == 1]) / length(grid$gridYearId[grid$urban1 == 1]) * 100, digits=1)
	
	# Total unrest events in cells
	
	r2d <- sum(grid$scad1.6[grid$urban1 == 1])
	
	# Percentage of all unrest events
	
	r2e <- round(sum(grid$scad1.6[grid$scad1.6 > 0 & grid$urban1 == 1]) / sum(grid$scad1.6) * 100, digits=1)
	
# Peri-urban

	# Cell count (all years) 
	
	r3a <- length(grid$gridYearId[grid$periUrban41 == 1])
	
	# Cell years experiencing unrest
	
	r3b <- length(grid$gridYearId[grid$scad1.6 > 0 & grid$periUrban41 == 1])
	
	# Percentage of cell years experiencing unrest
	
	r3c <- round(length(grid$gridYearId[grid$scad1.6 > 0 & grid$periUrban41 == 1]) / length(grid$gridYearId[grid$periUrban41 == 1]) * 100, digits=1)
	
	# Total unrest events in cells
	
	r3d <- sum(grid$scad1.6[grid$periUrban41 == 1])
	
	# Percentage of all unrest events
	
	r3e <- round(sum(grid$scad1.6[grid$scad1.6 > 0 & grid$periUrban41 == 1]) / sum(grid$scad1.6) * 100, digits=1)

# Rural 

	# Cell count (all years) 
	
	r4a <- length(grid$gridYearId[grid$urban4 == 0])
	
	# Cell years experiencing unrest
	
	r4b <- length(grid$gridYearId[grid$scad1.6 > 0 & grid$urban4 == 0])
	
	# Percentage of cell years experiencing unrest
	
	r4c <- round(length(grid$gridYearId[grid$scad1.6 > 0 & grid$urban4 == 0]) / length(grid$gridYearId[grid$urban4 == 0]) * 100, digits=1)
	
	# Total unrest events in cells
	
	r4d <- sum(grid$scad1.6[grid$urban4 == 0])
	
	# Percentage of all unrest events
	
	r4e <- round(sum(grid$scad1.6[grid$scad1.6 > 0 & grid$urban4 == 0]) / sum(grid$scad1.6) * 100, digits=1)
	
# Make descriptive table

cellType <- c("All types", "Core urban", "Peri-urban", "Rural") 
cellCount <- c(r1a, r2a, r3a, r4a)
cellYears <- c(r1b, r2b, r3b, r4b)
pcCellYears <- c(r1c, r2c, r3c, r4c)
totalUnrest <- c(r1d, r2d, r3d, r4d) 
pcUnrest <- c(r1e, r2e, r3e, r4e) 

descriptive <- data.frame(cellType, cellCount, cellYears, pcCellYears, totalUnrest, pcUnrest)

descriptive 

######################
#      Table 2       #
######################

# logit, including spatial lag variable, scadLag

	print('# logit, including spatial lag variable, scadLag')
	reg4 <- glm(scad1.6>0 ~ pgr + grp2 + urban1 + lagddr + droughtProxy5
		+ gridYear + infraProxy14 
		+ gridDistToCapKm +  cgp + scadLag,
    		family=binomial,data=urban4)

	summary(reg4)
	round(summary(reg4)$coef,3)

# logit base model excluding 2 drought variables

	print('#logit based model excluding 2 drought variables')
	reg4a <-glm(scad1.6>0 ~  pgr + grp2 +  urban1 
		+ gridYear + infraProxy14 
		+ gridDistToCapKm + cgp + scadLag,
		family=binomial,data=urban4)

	summary(reg4a)
	round(summary(reg4a)$coef,3)


# logit, periUrban41 only

	print('# logit, periUrban41 only')
	reg4.pU <- glm(scad1.6>0 ~ pgr +  grp2 
		+ gridYear + infraProxy14 
		+ gridDistToCapKm + cgp + scadLag,
		family=binomial,data=urban4,subset=periUrban41==1)
	
	summary(reg4.pU)
	round(summary(reg4.pU)$coef,3)

# logit, urban 1 only

	print('# logit, urban 1 only')
	reg4.u1 <- glm(scad1.6>0 ~ pgr + grp2 
		+ gridYear + infraProxy14 
		+ gridDistToCapKm + cgp + scadLag,
		family=binomial,data=urban4,subset=urban1==1)
	
	summary(reg4.u1)
	round(summary(reg4.u1)$coef,3)

# Format table

# Output to screen

	stargazer(reg4a, reg4, reg4.pU, reg4.u1, type="text",
	          covariate.labels = c("Grid pop growth", "Grid population", 
	                               "Urban core", "Distance from drought area (lag) (past 12 months)", "Drought (past 12 months)", "Grid year", 
	                               "Density of infrastructure", 
	                               "Grid distance to capital (km)", 
	                               "Log GDP grid per capita", 
	                               "SCAD (lag)", "Constant"))
# Output to html file
	
	stargazer(reg4a, reg4, reg4.pU, reg4.u1,  type="html", 
	          out="table2.html",
	          covariate.labels = c("Grid pop growth", "Grid population", 
	                               "Urban core", "Distance from drought area (lag) (past 12 months)", "Drought (past 12 months)", "Grid year", 
	                               "Density of infrastructure", 
	                               "Grid distance to capital (km)", 
	                               "Log GDP grid per capita", 
	                               "SCAD (lag)", "Constant"))
	                               
######################
#     Figure 7       #
#                    #
######################

hypDat = data.frame(
	lagddr=median(urban4$lagddr,na.rm=T), 
	grp2=median(urban4$grp, na.rm=T), 
	pgr=seq(min(urban4$pgr, na.rm=T), 
	max(urban4$pgr, na.rm=T), by=0.05), 
	urban1=median(urban4$urban1, na.rm=T), 
	droughtProxy5=median(urban4$droughtProxy5, na.rm=T), 
	gridYear=median(urban4$gridYear, na.rm=T), 
	infraProxy14=median(urban4$infraProxy14, na.rm=T), 
	gridDistToCapKm=median(urban4$gridDistToCapKm, na.rm=T), 
	cgp=median(urban4$cgp, na.rm=T), 
	scadLag=median(urban4$scadLag, na.rm=T))

# + grp2 + pgr +  urban1 + droughtProxy5
#	+  gridYear + infraProxy14 
#	+ gridDistToCapKm +  cgp + scadLag

preg4 <- predict(reg4, newdata=hypDat, type="response", se.fit=T)

png("fig7.png", width=1400, height=1400, res=150)

plot(hypDat$pgr, preg4$fit, type="l", lwd=3, ylim=c(-0.01,0.15), xlab="Population growth rate", ylab="P(conflict)")
lines(hypDat$pgr, preg4$fit+1.96*preg4$se.fit, type="l", lty=2)
lines(hypDat$pgr, preg4$fit-1.96*preg4$se.fit, type="l", lty=2)

dev.off()

preg4$fit
hypDat$pgr
hypDat$pgr[238]
hypDat$pgr[138]
preg4$fit[238]
preg4$fit[138]

preg4$fit[138] / preg4$fit[238]

######################
#     Figure 8       #
#  Just peri-urban   #
######################

urban4Redux <- subset(urban4, periUrban41 == 1)

reg4Redux <- glm(scad1.6>0 ~ pgr + grp2 + lagddr + droughtProxy5
	+ gridYear + infraProxy14 
	+ gridDistToCapKm +  cgp + scadLag,
	family=binomial,data=urban4Redux)

hypDatRedux = data.frame(
	lagddr=median(urban4Redux$lagddr,na.rm=T), 
	grp2=median(urban4Redux$grp, na.rm=T), 
	pgr=seq(min(urban4Redux$pgr, na.rm=T), 
	max(urban4Redux$pgr, na.rm=T), by=0.05), 
	droughtProxy5=median(urban4Redux$droughtProxy5, na.rm=T), 
	gridYear=median(urban4Redux$gridYear, na.rm=T), 
	infraProxy14=median(urban4Redux$infraProxy14, na.rm=T), 
	gridDistToCapKm=median(urban4Redux$gridDistToCapKm, na.rm=T), 
	cgp=median(urban4Redux$cgp, na.rm=T), 
	scadLag=median(urban4Redux$scadLag, na.rm=T))

preg4Redux <- predict(reg4Redux, newdata=hypDatRedux, type="response", se.fit=T)

png("fig8.png", width=1400, height=1400, res=150)

plot(hypDatRedux$pgr, preg4Redux$fit, type="l", lwd=3, ylim=c(-0.01,0.15), xlab="Population growth rate", ylab="P(conflict)")
lines(hypDatRedux$pgr, preg4Redux$fit+1.96*preg4Redux$se.fit, type="l", lty=2)
lines(hypDatRedux$pgr, preg4Redux$fit-1.96*preg4Redux$se.fit, type="l", lty=2)

dev.off()

preg4Redux$fit
hypDatRedux$pgr
hypDatRedux$pgr[238]
hypDatRedux$pgr[138]
preg4Redux$fit[238]
preg4Redux$fit[138]

preg4Redux$fit[138] / preg4Redux$fit[238]


######################
#      Table 3       #
#  (48 month data)   #
######################

# Reload urban 4 to use the 48 month data

	urban4 <- read.table("urban4_v_023_48months.csv", sep=",",header=T)

# logit, including spatial lag variable, scadLag

	print('# logit, including spatial lag variable, scadLag')
	reg4 <- glm(scad1.6>0 ~ pgr +  grp2 +  urban1 + lagddr + droughtProxy5
	+ gridYear + infraProxy14 
	+ gridDistToCapKm +  cgp + scadLag,
    	family=binomial,data=urban4)

	summary(reg4)
	round(summary(reg4)$coef,3)

#logit base model excluding drought variables

	print('#logit based model excluding drought variables')
	reg4bm <-glm(scad1.6>0 ~ pgr +  grp2 +  urban1 
	+ gridYear + infraProxy14 
	+ gridDistToCapKm + cgp + scadLag,
	family=binomial,data=urban4)

	summary(reg4a)
	round(summary(reg4a)$coef,3)
	
# logit, periUrban41 only

	print('# logit, periUrban41 only')
	reg4.pU <- glm(acledProtestsAndRiotsCount>0 ~ pgr +  grp2 
		+ gridYear + infraProxy14 
		+ gridDistToCapKm + cgp + acledProtestsAndRiotsSpatialLag,
		family=binomial,data=urban4,subset=periUrban41==1)
	
	summary(reg4.pU)
	round(summary(reg4.pU)$coef,3)

# logit, urban 1 only

	print('# logit, urban 1 only')
	reg4.u1 <- glm(acledProtestsAndRiotsCount>0 ~ pgr + grp2 
		+ gridYear + infraProxy14 
		+ gridDistToCapKm + cgp + acledProtestsAndRiotsSpatialLag,
		family=binomial,data=urban4,subset=urban1==1)
	
	summary(reg4.u1)
	round(summary(reg4.u1)$coef,3)


# Format table

# Output to screen

	stargazer(reg4bm, reg4, reg4.pU, reg4.u1, type="text", dep.var.labels.include = FALSE, column.labels = c("SCAD", "SCAD", "ACLED", "ACLED"), covariate.labels = c("Grid pop growth", "Grid population", "Urban core", "Distance from drought area (lag) (past 48 months)", "Drought (past 48 months)", "Grid year", "Density of infrastructure", "Grid distance to capital (km)", "Log GDP grid per capita", "SCAD/ ACLED (lag)", "Constant"))
	
# Output to html file
	
	stargazer(reg4bm, reg4, reg4.pU, reg4.u1, type="html", out="table3.html", dep.var.labels.include = FALSE, column.labels = c("SCAD", "SCAD", "ACLED", "ACLED"), covariate.labels = c("Grid pop growth", "Grid population", "Urban core", "Distance from drought area (lag) (past 48 months)", "Drought (past 48 months)", "Grid year", "Density of infrastructure", "Grid distance to capital (km)", "Log GDP grid per capita", "SCAD/ ACLED (lag)", "Constant"))


######################
#      Appendix      #
#       Tables       #
######################

# Reload the 12 month data 

	urban4 <- read.table("urban4_v_023_12months.csv", sep=",",header=T)
	
	
######################
#      Table A2      #
######################

# logit base model excluding 2 drought variables

	print('#logit based model excluding 2 drought variables')
	reg4a <-glm(scad1.6>0 ~  pgr + grp2 +  urban1 
		+ gridYear + infraProxy14 
		+ gridDistToCapKm + cgp + scadLag,
		family=binomial,data=urban4)

	summary(reg4a)
	round(summary(reg4a)$coef,3)

# logit with interaction pgr and cgp, plus ethnicities

	print('# logit with ethnicity count')
	reg4liEth <- glm(scad1.6>0 ~ grp2 + pgr +  urban1 
		+ gridYear + infraProxy14 
		+ gridDistToCapKm +  cgp +  scadLag + ethnicityCount,
    		family=binomial,data=urban4)
	
	summary(reg4liEth)
	
# logit with interaction pgr and cgp, plus ethnicities cubed

	urban4$ethnicityCubed <- urban4$ethnicityCount^3

	print('# logit with ethnicity cubed')
	reg4liEthC <- glm(scad1.6>0 ~ grp2 + pgr +  urban1 
		+ gridYear + infraProxy14 
		+ gridDistToCapKm +  cgp +  scadLag + ethnicityCubed,
    		family=binomial,data=urban4)
	
	summary(reg4liEthC)

# logit with excluded ethnicities


	print('# logit with xcluded ethnicities')
	reg4liExc <- glm(scad1.6>0 ~ grp2 + pgr +  urban1 
		+ gridYear + infraProxy14 
		+ gridDistToCapKm +  cgp +  scadLag + excludedEthnicityBinary,
    		family=binomial,data=urban4)
	
	summary(reg4liExc)

# Format table

# Output to screen

	stargazer(reg4a, reg4liEth, reg4liEthC, reg4liExc, type="text", covariate.labels = c("Grid pop growth", "Grid population", "Urban core", "Grid year", "Density of infrastructure", "Grid distance to capital (km)", "Log GDP grid per capita", "SCAD (lag)", "GeoEPR number of ethnic groups", "GeoEPR number of ethnic groups cubed", "GeoEPR excluded ethnic group(s) (binary)", "Constant"))

# Output to html file

	stargazer(reg4a, reg4liEth, reg4liEthC, reg4liExc, type="html", out="tableA2.html", covariate.labels = c("Grid pop growth", "Grid population", "Urban core", "Grid year", "Density of infrastructure", "Grid distance to capital (km)", "Log GDP grid per capita", "SCAD (lag)", "GeoEPR number of ethnic groups", "GeoEPR number of ethnic groups cubed", "GeoEPR excluded ethnic group(s) (binary)", "Constant"))


######################
#  Appendix Table A3 #
#  Robustness checks # 
#     (12 months)    #
######################

# appendix table 3, model 1

	a31 <-glm(scad1.6>0 ~ grp2 + pgr + urban1 
		    + gridYear + infraProxy14 
		    + gridDistToCapKm + cgp + scadLag,
		    family=binomial,data=urban4)
	
	summary(a31)
	round(summary(a31)$coef,3)

# appendix table 3, model 3
# gni, not grid gdp

	a33 <-glm(scad1.6>0 ~ grp2 + pgr + urban1 
		    + gridYear + infraProxy14 
		    + gridDistToCapKm + scadLag + gni,
		    family=binomial,data=urban4)
	
	summary(a33)
	round(summary(a33)$coef,3)

# appendix table 3, model 4
# distance from country conflicts

	a34 <-glm(scad1.6>0 ~ grp2 + pgr + urban1 
		    + gridYear + infraProxy14 
		    + gridDistToCapKm + cgp + scadLag,
		    family=binomial,data=urban4)
	
	summary(a34)
	round(summary(a34)$coef,3)

# appendix table 3, model 5 
# political stability

	a35 <-glm(scad1.6>0 ~ grp2 + pgr + urban1 
		    + gridYear + infraProxy14 
		    + gridDistToCapKm + cgp + scadLag + politicalStability, 
		    family=binomial,data=urban4)

	summary(a35) 
	round(summary(a35)$coef,3) 
	
# appendix table 3, model 6 
# government effectiveness

	a36 <-glm(scad1.6>0 ~ grp2 + pgr + urban1 
		    + gridYear + infraProxy14 
		    + gridDistToCapKm + cgp + scadLag + politicalStability
		    + governmentEffectiveness, 
		    family=binomial,data=urban4)

	summary(a36) 
	round(summary(a36)$coef,3) 
	
# appendix table 3, model 7 
# voice and accountability

	a37 <-glm(scad1.6>0 ~ grp2 + pgr + urban1 
		    + gridYear + infraProxy14 
		    + gridDistToCapKm + cgp + scadLag + politicalStability 
		    + governmentEffectiveness + voiceAndAccountability, family=binomial,data=urban4)

	summary(a37) 
	round(summary(a37)$coef,3) 
	
# appendix table 3, model 8 
# global food price index

	a38 <-glm(scad1.6>0 ~ grp2 + pgr + urban1 
		    + gridYear + infraProxy14 
		    + gridDistToCapKm + cgp + scadLag + politicalStability 
		    + governmentEffectiveness + voiceAndAccountability
		    + annualGlobalFoodPriceIndex, family=binomial,data=urban4)

	summary(a38) 
	round(summary(a38)$coef,3)

# Format table

# Output to screen

	stargazer(a31, a33, a34, a35, a36, a37, a38, type="text", covariate.labels = c("Grid population", "Grid pop growth", "Urban core", "Grid year", "Density of infrastructure", "Grid distance to capital (km)", "Log GDP per capita", "SCAD (lag)", "GDI", "Political stability", "Government effectiveness", "Voice and accountability", "Food price index"))

# Output to html file 

	stargazer(a31, a33, a34, a35, a36, a37, a38, type="html", out="tableA3.html", covariate.labels = c("Grid population", "Grid pop growth", "Urban core", "Grid year", "Density of infrastructure", "Grid distance to capital (km)", "Log GDP per capita", "SCAD (lag)", "GDI", "Political stability", "Government effectiveness", "Voice and accountability", "Food price index"))
	

######################
#  Appendix Table A4 #
#  (reproduction of  #
#    Table A2, but   #
#   with ACLED data  #
#   instead of SCAD) #
######################

# Reload urban 4 to use the 12 month data

	urban4 <- read.table("urban4_v_023_12months.csv", sep=",",header=T)
	
# logit, including spatial lag variable, acledProtestsAndRiotsSpatialLag

	print('# logit, including spatial lag variable, acledProtestsAndRiotsSpatialLag')
	reg4 <- glm(acledProtestsAndRiotsCount>0 ~ pgr + grp2 + urban1 + lagddr + droughtProxy5
		+ gridYear + infraProxy14 
		+ gridDistToCapKm +  cgp + acledProtestsAndRiotsSpatialLag,
    		family=binomial,data=urban4)

	summary(reg4)
	round(summary(reg4)$coef,3)

# logit base model excluding 2 drought variables

	print('#logit based model excluding 2 drought variables')
	reg4a <-glm(acledProtestsAndRiotsCount>0 ~  pgr + grp2 +  urban1 
		+ gridYear + infraProxy14 
		+ gridDistToCapKm + cgp + acledProtestsAndRiotsSpatialLag,
		family=binomial,data=urban4)

	summary(reg4a)
	round(summary(reg4a)$coef,3)


# logit, periUrban41 only

	print('# logit, periUrban41 only')
	reg4.pU <- glm(acledProtestsAndRiotsCount>0 ~ pgr +  grp2 
		+ gridYear + infraProxy14 
		+ gridDistToCapKm + cgp + acledProtestsAndRiotsSpatialLag,
		family=binomial,data=urban4,subset=periUrban41==1)
	
	summary(reg4.pU)
	round(summary(reg4.pU)$coef,3)

# logit, urban 1 only

	print('# logit, urban 1 only')
	reg4.u1 <- glm(acledProtestsAndRiotsCount>0 ~ pgr + grp2 
		+ gridYear + infraProxy14 
		+ gridDistToCapKm + cgp + acledProtestsAndRiotsSpatialLag,
		family=binomial,data=urban4,subset=urban1==1)
	
	summary(reg4.u1)
	round(summary(reg4.u1)$coef,3)

# Format table

# Output to screen

	stargazer(reg4a, reg4, reg4.pU, reg4.u1, type="text", covariate.labels = c("Grid pop growth", "Grid population", "Urban core", "Distance from drought area (lag) (past 12 months)", "Drought (past 12 months)", "Grid year", "Density of infrastructure", "Grid distance to capital (km)", "Log GDP grid per capita", "ACLED (lag)", "Constant"))

# Output to html file

	stargazer(reg4a, reg4, reg4.pU, reg4.u1, type="html", out="tableA4.html", covariate.labels = c("Grid pop growth", "Grid population", "Urban core", "Distance from drought area (lag) (past 12 months)", "Drought (past 12 months)", "Grid year", "Density of infrastructure", "Grid distance to capital (km)", "Log GDP grid per capita", "ACLED (lag)", "Constant"))

